{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Module Feedback\n",
    "\n",
    "This system records the responses of students on their learning experience at university.\n",
    "\n",
    "![rel](https://sqlzoo.net/w/images/7/71/Module-feedback.png)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading spark-stubs, spark-hive\n",
      "Adding Hive conf dir /opt/hive/conf to classpath\n",
      "Creating SparkSession\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "SLF4J: No SLF4J providers were found.\n",
      "SLF4J: Defaulting to no-operation (NOP) logger implementation\n",
      "SLF4J: See https://www.slf4j.org/codes.html#noProviders for further details.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<a target=\"_blank\" href=\"http://jupyter:4040\">Spark UI</a>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "\u001b[32mimport \u001b[39m\u001b[36m$ivy.$                                  \n",
       "\n",
       "\u001b[39m\n",
       "\u001b[32mimport \u001b[39m\u001b[36morg.apache.log4j.{Level, Logger}\n",
       "\u001b[39m\n",
       "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark._\n",
       "\u001b[39m\n",
       "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql._\n",
       "\u001b[39m\n",
       "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql.types._\n",
       "\u001b[39m\n",
       "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql.functions._\n",
       "\u001b[39m\n",
       "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql.expressions.Window\n",
       "\n",
       "\u001b[39m\n",
       "\u001b[36mspark\u001b[39m: \u001b[32mSparkSession\u001b[39m = org.apache.spark.sql.SparkSession@2808f699"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import $ivy.`org.apache.spark::spark-sql:3.4.0`\n",
    "\n",
    "import org.apache.log4j.{Level, Logger}\n",
    "Logger.getLogger(\"org\").setLevel(Level.OFF)\n",
    "\n",
    "import org.apache.spark._\n",
    "import org.apache.spark.sql._\n",
    "import org.apache.spark.sql.types._\n",
    "import org.apache.spark.sql.functions._\n",
    "import org.apache.spark.sql.expressions.Window\n",
    "\n",
    "val spark = {\n",
    "    NotebookSparkSession.builder()\n",
    "    .progress(false)\n",
    "    .appName(\"app11\")\n",
    "    // .master(\"spark://192.168.31.31:7077\")\n",
    "    .master(\"local[*]\")\n",
    "    .config(\"spark.sql.warehouse.dir\", \n",
    "            \"hdfs://192.168.31.31:9000/user/hive/warehouse\") \n",
    "    .config(\"spark.cores.max\", \"4\") \n",
    "    .config(\"spark.executor.instances\", \"1\") \n",
    "    .config(\"spark.executor.cores\", \"2\") \n",
    "    .config(\"spark.executor.memory\", \"10g\") \n",
    "    .config(\"spark.shuffle.service.enabled\", \"false\") \n",
    "    .config(\"spark.dynamicAllocation.enabled\", \"false\") \n",
    "    .config(\"spark.sql.catalogImplementation\", \"hive\")\n",
    "    .config(\"spark.sql.repl.eagerEval.enabled\", \"true\")\n",
    "    .config(\"spark.driver.allowMultipleContexts\", \"true\")\n",
    "    .getOrCreate()\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\u001b[32mimport \u001b[39m\u001b[36mspark.implicits._\n",
       "\u001b[39m\n",
       "defined \u001b[32mfunction\u001b[39m \u001b[36msc\u001b[39m\n",
       "\u001b[36mhiveCxt\u001b[39m: \u001b[32msql\u001b[39m.\u001b[32mhive\u001b[39m.\u001b[32mHiveContext\u001b[39m = org.apache.spark.sql.hive.HiveContext@1db6496"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import spark.implicits._\n",
    "def sc = spark.sparkContext\n",
    "val hiveCxt = new org.apache.spark.sql.hive.HiveContext(sc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "defined \u001b[32mclass\u001b[39m \u001b[36mRichDF\u001b[39m"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "// Credit to Aivean\n",
    "implicit class RichDF(val ds:DataFrame) {\n",
    "    def showHTML(limit: Int = 50, truncate: Int = 100) = {\n",
    "        import xml.Utility.escape\n",
    "        val data = ds.take(limit)\n",
    "        val header = ds.schema.fieldNames.toSeq        \n",
    "        val rows: Seq[Seq[String]] = data.map { row =>\n",
    "          row.toSeq.map {cell =>\n",
    "            val str = cell match {\n",
    "              case null => \"null\"\n",
    "              case binary: Array[Byte] => binary.map(\"%02X\".format(_)).mkString(\"[\", \" \", \"]\")\n",
    "              case array: Array[_] => array.mkString(\"[\", \", \", \"]\")\n",
    "              case seq: Seq[_] => seq.mkString(\"[\", \", \", \"]\")\n",
    "              case _ => cell.toString\n",
    "            }\n",
    "            if (truncate > 0 && str.length > truncate) {\n",
    "              // do not show ellipses for strings shorter than 4 characters.\n",
    "              if (truncate < 4) str.substring(0, truncate)\n",
    "              else str.substring(0, truncate - 3) + \"...\"\n",
    "            } else {\n",
    "              str\n",
    "            }\n",
    "          }: Seq[String]\n",
    "        }\n",
    "    publish.html(s\"\"\" <table>\n",
    "                <tr>\n",
    "                 ${header.map(h => s\"<th>${escape(h)}</th>\").mkString}\n",
    "                </tr>\n",
    "                ${rows.map {row =>\n",
    "                  s\"<tr>${row.map{c => s\"<td>${escape(c)}</td>\" }.mkString}</tr>\"\n",
    "                }.mkString}\n",
    "            </table>\n",
    "        \"\"\")\n",
    "    }\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\u001b[36minsspr\u001b[39m: \u001b[32mDataFrame\u001b[39m = [spr_code: string, spr_fnm1: string ... 1 more field]\n",
       "\u001b[36minsmod\u001b[39m: \u001b[32mDataFrame\u001b[39m = [mod_code: string, mod_name: string ... 1 more field]\n",
       "\u001b[36minsprs\u001b[39m: \u001b[32mDataFrame\u001b[39m = [prs_code: string, prs_fnm1: string ... 1 more field]\n",
       "\u001b[36mcamsmo\u001b[39m: \u001b[32mDataFrame\u001b[39m = [spr_code: string, mod_code: string ... 2 more fields]\n",
       "\u001b[36minsres\u001b[39m: \u001b[32mDataFrame\u001b[39m = [spr_code: string, mod_code: string ... 4 more fields]\n",
       "\u001b[36minsque\u001b[39m: \u001b[32mDataFrame\u001b[39m = [que_code: string, cat_code: string ... 2 more fields]\n",
       "\u001b[36minscat\u001b[39m: \u001b[32mDataFrame\u001b[39m = [cat_code: string, cat_name: string ... 1 more field]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "val insspr = hiveCxt.table(\"sqlzoo.INS_SPR\")\n",
    "val insmod = hiveCxt.table(\"sqlzoo.INS_MOD\")\n",
    "val insprs = hiveCxt.table(\"sqlzoo.INS_PRS\")\n",
    "val camsmo = hiveCxt.table(\"sqlzoo.CAM_SMO\")\n",
    "val insres = hiveCxt.table(\"sqlzoo.INS_RES\")\n",
    "val insque = hiveCxt.table(\"sqlzoo.INS_QUE\")\n",
    "val inscat = hiveCxt.table(\"sqlzoo.INS_CAT\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Find the student name from a matriculation number\n",
    "\n",
    "**Find the name of the student with number 50200100**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>SPR_FNM1</th><th>SPR_SURN</th>\n",
       "                </tr>\n",
       "                <tr><td>Tom</td><td>Cotton</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "(insspr.filter(col(\"SPR_CODE\")===\"50200100\")\n",
    " .select(\"SPR_FNM1\", \"SPR_SURN\").showHTML())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Find the modules studied by a student\n",
    "\n",
    "**Show the module code and module name for modules studied by the student with number 50200100 in session 2016/7 TR1**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>MOD_CODE</th><th>MOD_NAME</th>\n",
       "                </tr>\n",
       "                <tr><td>CSN08101</td><td>Systems and Services</td></tr><tr><td>INF08104</td><td>Database Systems</td></tr><tr><td>SET08108</td><td>Software Development 2</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "(camsmo.filter((col(\"SPR_CODE\")===\"50200100\") && \n",
    "               (trim(col(\"AYR_CODE\"))===\"2016/7\") && \n",
    "               (col(\"PSL_CODE\")===\"TR1\"))\n",
    " .join(insmod, \"MOD_CODE\")\n",
    " .select(\"MOD_CODE\", \"MOD_NAME\")\n",
    " .showHTML())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Find the modules and module leader studied by a student\n",
    "\n",
    "**Show the module code and module name and details of the module leader for modules studied by the student with number 50200100 in session 2016/7 TR1**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>MOD_CODE</th><th>MOD_NAME</th><th>PRS_CODE</th><th>PRS_FNM1</th><th>PRS_SURN</th>\n",
       "                </tr>\n",
       "                <tr><td>CSN08101</td><td>Systems and Services</td><td>40000008</td><td>James</td><td>Jackson</td></tr><tr><td>INF08104</td><td>Database Systems</td><td>40000036</td><td>Andrew</td><td>Cumming</td></tr><tr><td>SET08108</td><td>Software Development 2</td><td>40000408</td><td>Neil</td><td>Urquhart</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "(camsmo.filter((col(\"SPR_CODE\")===\"50200100\") && \n",
    "               (trim(col(\"AYR_CODE\"))===\"2016/7\") && \n",
    "               (col(\"PSL_CODE\")===\"TR1\"))\n",
    " .join(insmod, \"MOD_CODE\")\n",
    " .join(insprs, \"PRS_CODE\")\n",
    " .select(\"MOD_CODE\", \"MOD_NAME\", \"PRS_CODE\", \"PRS_FNM1\", \"PRS_SURN\")\n",
    " .showHTML())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Show the scores for module SET08108\n",
    "\n",
    "**Show the Percentage of students who gave 4 or 5 to module SET08108 in session 2016/7 TR1**\n",
    "\n",
    "(note that this is not real data, these responses were randomly generated)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>QUE_CODE</th><th>QUE_TEXT</th><th>CAT_NAME</th><th>pct</th>\n",
       "                </tr>\n",
       "                <tr><td>1.1</td><td>Staff are good at explaining things.</td><td>Learning and Teaching</td><td>89.0</td></tr><tr><td>1.2</td><td>Staff made the subject interesting.</td><td>Learning and Teaching</td><td>82.0</td></tr><tr><td>1.3</td><td>The module was intellectually stimulating.</td><td>Learning and Teaching</td><td>82.0</td></tr><tr><td>1.4</td><td>The aims and objectives were clearly stated.</td><td>Learning and Teaching</td><td>89.0</td></tr><tr><td>1.5</td><td>The module was well-organised and ran smoothly.</td><td>Learning and Teaching</td><td>78.0</td></tr><tr><td>1.6</td><td>The pace was appropriate.</td><td>Learning and Teaching</td><td>80.0</td></tr><tr><td>1.7</td><td>The level was appropriate.</td><td>Learning and Teaching</td><td>82.0</td></tr><tr><td>1.8</td><td>The workload was managable.</td><td>Learning and Teaching</td><td>78.0</td></tr><tr><td>1.9</td><td>I was able to contact module staff when I needed to.</td><td>Learning and Teaching</td><td>76.0</td></tr><tr><td>2.1</td><td>The assessment requirements were clearly stated.</td><td>Assessment and Feedback</td><td>84.0</td></tr><tr><td>2.2</td><td>The criteria for marking was made clear to me.</td><td>Assessment and Feedback</td><td>82.0</td></tr><tr><td>2.3</td><td>I was well supported for the assessment.</td><td>Assessment and Feedback</td><td>80.0</td></tr><tr><td>2.4</td><td>I was provided with feedback that aided understanding.</td><td>Assessment and Feedback</td><td>80.0</td></tr><tr><td>2.5</td><td>Feedback was provided within three weeks of submission.</td><td>Assessment and Feedback</td><td>80.0</td></tr><tr><td>3.1</td><td>The module was well supported by moodle.</td><td>Learning Resources</td><td>73.0</td></tr><tr><td>3.2</td><td>The library resources met my needs.</td><td>Learning Resources</td><td>82.0</td></tr><tr><td>3.3</td><td>The IT resources met my needs.</td><td>Learning Resources</td><td>89.0</td></tr><tr><td>3.4</td><td>The rooms/facilities were of good quality.</td><td>Learning Resources</td><td>78.0</td></tr><tr><td>4.1</td><td>I was statisfied with the module.</td><td>Overall</td><td>89.0</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "(insres.filter((col(\"MOD_CODE\")===\"SET08108\") && \n",
    "               (trim(col(\"AYR_CODE\"))===\"2016/7\") &&\n",
    "               (col(\"PSL_CODE\")===\"TR1\"))\n",
    " .withColumn(\"score\", floor(col(\"RES_VALU\")/4))\n",
    " .groupBy(\"QUE_CODE\")\n",
    " .agg(sum(\"score\").alias(\"score\"), \n",
    "      count(\"score\").alias(\"n\"))\n",
    " .withColumn(\"pct\", round(lit(100) * col(\"score\") / col(\"n\"), 0))\n",
    " .join(insque, \"QUE_CODE\")\n",
    " .join(inscat, \"CAT_CODE\")\n",
    " .select(\"QUE_CODE\", \"QUE_TEXT\", \"CAT_NAME\", \"pct\")\n",
    " .orderBy(\"QUE_CODE\")\n",
    " .showHTML())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5. Show the frequency chart for module SET08108 for question 4.1\n",
    "\n",
    "**For each response 1-5 show the number of students who gave that response (Module SET08108, 2016/7, TR1)**\n",
    "\n",
    "(note that this is not real data, these responses were randomly generated)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       " <table>\n",
       "                <tr>\n",
       "                 <th>MOD_CODE</th><th>RES_VALU</th><th>count</th>\n",
       "                </tr>\n",
       "                <tr><td>SET08108</td><td>2</td><td>6</td></tr><tr><td>SET08108</td><td>5</td><td>39</td></tr><tr><td>SET08108</td><td>4</td><td>10</td></tr>\n",
       "            </table>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "(insres.filter((col(\"MOD_CODE\")===\"SET08108\") && \n",
    "               (trim(col(\"AYR_CODE\"))===\"2016/7\") &&\n",
    "               (col(\"PSL_CODE\")===\"TR1\") &&\n",
    "               (col(\"QUE_CODE\")===\"4.1\"))\n",
    " .groupBy(\"MOD_CODE\", \"RES_VALU\")\n",
    " .count()\n",
    " .showHTML())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "spark.stop()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Scala",
   "language": "scala",
   "name": "scala"
  },
  "language_info": {
   "codemirror_mode": "text/x-scala",
   "file_extension": ".sc",
   "mimetype": "text/x-scala",
   "name": "scala",
   "nbconvert_exporter": "script",
   "version": "2.12.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
